package cn.edu360.report

import cn.edu360.utils.{ConfigHandler, FileHandler, MySQLHandler}
import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
import org.apache.spark.{SparkConf, SparkContext}

object LogDataAnalysis {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf()
    sparkConf.setMaster("local[*]")
    sparkConf.setAppName("统计日志文件中各省市的数据分布情况")
    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    val sc = new SparkContext(sparkConf)
    val sQLContext = new SQLContext(sc)
    // 读取数据
    val rawDataFrame = sQLContext.read.parquet(ConfigHandler.parquetPath)
    // 按照需求进行相应的分析
    rawDataFrame.registerTempTable("log")
    // 根据省份和地市进行分组统计
    val result: DataFrame = sQLContext.sql(
      """
          select count(*) ct,provincename,cityname
          from log group by provincename,cityname
      """.stripMargin)
    FileHandler.deleteWillOutputDir(sc, ConfigHandler.logdataAnalysisResultJsonPath)
    //将结果写成json数据格式
    //result.coalesce(1).write.json(ConfigHandler.logdataAnalysisResultJsonPath)

    // 将结果写出到MySQL
    //result.write.mode(SaveMode.Overwrite).jdbc(ConfigHandler.url,ConfigHandler.logDataAnalysisTableName,ConfigHandler.props)
    MySQLHandler.save2db(result,ConfigHandler.logDataAnalysisTableName)
    sc.stop()
  }
}
